Created
December 13, 2015 04:07
-
-
Save iheanyi/cba88dddc0638f55306a to your computer and use it in GitHub Desktop.
Elixir stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule API do | |
alias Utils | |
@base_url "https://class-search.nd.edu/reg/srch/ClassSearchServlet" | |
@doc """ | |
Initialize the initial page and everything. | |
""" | |
def initialize(page \\ :initial) do | |
API.Search.start_link(page) | |
end | |
@doc """ | |
Fetches the initial Class Search page. | |
""" | |
def fetch_initial_page do | |
# We just want to get the initial URL | |
html = HTTPoison.get!(@base_url).body | |
html | |
end | |
@doc """ | |
Fetches a list of all the terms, it's lit. | |
""" | |
def fetch_terms do | |
html = fetch_initial_page #API.Search.get(:initial) | |
terms = Floki.find(html, "select[name=TERM] option") | |
|> Enum.map(fn term -> | |
term_value = Floki.attribute(term, "value") | |
|> List.first | |
term_name = String.strip(Floki.text(term)) | |
# Let's return a JSON mapping of all of the terms. | |
%{name: term_name, value: term_value} | |
end | |
) | |
terms | |
end | |
@doc """ | |
Fetches a list of all of the departments. | |
""" | |
def fetch_departments do | |
html = fetch_initial_page | |
departments = Floki.find(html, "select[name=SUBJ] option") | |
|> Enum.map(fn dept -> | |
dept_value = Floki.attribute(dept, "value") | |
|> List.first | |
dept_name = Floki.text(dept) | |
# Let's return a JSON mapping of all the departments and their values. | |
%{name: dept_name, value: dept_value} | |
end | |
) | |
end | |
@doc """ | |
Processes the HTML for a course row. | |
""" | |
def process_section_html(first_section) do | |
# Each Cell/Index | |
# (0) Course Section and Course Number, also has URL link to the books | |
# relevant to that section in Hammes Bookstore. | |
# (1) Title of the course. | |
# (2) Number of credits for the course. | |
# (3) Status of the course seats (OP for open, CL for closed). | |
# (4) Max number of seats. | |
# (5) Open number of seats. | |
# (6) Cross-Listed? | |
# (7) CRN for the course. | |
# (8) Syllabus for the course | |
# (9) Instructor for the course! | |
# (10) When the course meets, course start time. | |
# * Note, these may be prone to having more than one start time, so we | |
# have to think of how to show / reflect this in the user interface for | |
# various sub-sections. Design flaw on ND's part. -_- | |
# (11) Begin/Start date for the course. | |
# (12) End date for the course. | |
# (13) Course Location. | |
# Also, probably should insert an actual link/reference to the actual | |
# course description page...JUST IN CASE, feel me? | |
{_, _, first_cell} = List.first(first_section) | |
{_, _, course_num_section_text} = List.first(first_cell) | |
# The books link will always be the last element. | |
{_, course_books_link_tag, _} = List.last(first_cell) | |
# Prints out the course number and the section number. O_O | |
course_num_section = List.first(course_num_section_text) | |
{_, course_books_link} = List.first(course_books_link_tag) | |
[course_num, course_section] = String.split(course_num_section," - ", trim: true) | |
# Second Cell - Course Title | |
{_, _, second_cell} = Enum.at(first_section, 1) | |
course_title = (to_string List.first(second_cell)) | |
#IO.puts(String.strip(Floki.text(second_cell))) | |
IO.puts String.valid? course_title | |
if !String.valid? course_title do | |
IO.puts "Invalid course title found!" | |
#IO.puts first_section | |
IO.puts String.printable? course_title | |
IO.puts String.chunk(course_title, :valid) | |
IO.puts String.chunk(course_title, :printable) | |
IO.puts second_cell | |
end | |
IO.inspect course_title | |
IO.puts course_title | |
#{_, _, course_title_text} = Enum.at(second_cell, 0) | |
#course_title = List.first(course_title_text) | |
#{_, _, course_title_text} = Enum.at(second_cell, 0) | |
#course_title = List.first(course_title_text) | |
# Third Cell - Credits | |
{_, _, third_cell} = Enum.at(first_section, 2) | |
credits = List.first(third_cell) | |
# Fourth Cell - Status | |
{_, _, fourth_cell} = Enum.at(first_section, 3) | |
status = List.first(fourth_cell) | |
# Fifth Cell - Max Seats | |
{_, _, fifth_cell} = Enum.at(first_section, 4) | |
max_seats = List.first(fifth_cell) | |
# Sixth Cell - Open Seats | |
{_, _, sixth_cell} = Enum.at(first_section, 5) | |
open_seats = List.first(sixth_cell) | |
# Seventh Cell - Cross Listed | |
{_, _, seventh_cell} = Enum.at(first_section, 6) | |
crosslisted = List.first(seventh_cell) | |
# Eighth Cell - CRN | |
{_, _, eighth_cell} = Enum.at(first_section, 7) | |
course_reg_number = String.strip(List.first(eighth_cell)) | |
# Ninth Cell - Syllabus | |
{_, _, ninth_cell} = Enum.at(first_section, 8) | |
syllabus = List.first(ninth_cell) | |
# Tenth Cell - Instructor | |
# May be more than one of these, would be wise to split these on the | |
# instances of the anchor tags in this element, for real. | |
{_, _, tenth_cell} = Enum.at(first_section, 9) | |
# Define instructors as an array | |
instructors = Enum.map(tenth_cell, fn tag -> | |
# If it is a tuple tag, then we are dealing with valid instructors and | |
# not TBA instructors. | |
if (is_tuple tag) do | |
# We know we have links in this. | |
# We want to capture the instructor id | |
# So we don't have namespacing issues. | |
{_, hrefs, _} = tag | |
{_, href} = List.first(hrefs) # Grab first HREF | |
instructor_id_map = Regex.named_captures(~r/P\=(?<id>\d+)/, href) | |
instructor_id = instructor_id_map["id"] | |
# Additionally, we want to capture the instructor's names. | |
instructor_full_name = Floki.text(tag) | |
{html_tag, html_attributes, html_text} = tag | |
instructor_name_text = List.first(html_text) | |
IO.puts instructor_name_text | |
instructor_name_array = instructor_name_text | |
|> String.strip | |
|> String.split(", ", trim: true) #String.split(instructor_name_text, ", ", trim: true) | |
[instructor_last, instructor_first] = instructor_name_array | |
instructor = "#{instructor_first} #{instructor_last}" | |
else | |
# The instructor will probably be TBA -_- | |
instructor = "TBA" | |
end | |
instructor | |
end) | |
# If name_length >= 2, we have a valid instructor | |
# In order to further refine the instructor, | |
# We can access the `href` of the <a> tag | |
# and use regex to match `P=<id>'`. | |
# Storing this in the database will allow us to know exactly | |
# which specific instructor we are looking at. So therefore, | |
# Instructors with mad common names like | |
# Mike Johnson will not be duplicated in the array. | |
# Eleventh Cell - Timeslots | |
# *Note: May have more than one timeslot with the (1). | |
# This will probably be reflected as a one-to-many Section to Timeslots | |
# in Phoenix. | |
# Additionally, gotta split on the timeslots by their start time, end | |
# times, and days of the week. | |
# Gotta parse this and make the timeslots for real, for reals. u_u | |
{_, _, eleventh_cell} = Enum.at(first_section, 10) | |
times = Utils.filter_binary_and_strip(eleventh_cell) #filtered_times | |
timeslots = String.strip(List.first(eleventh_cell)) | |
# Tweltfhh Cell - Begin Date | |
# *Note: May have more than one begin date if they have more than one | |
# timeslot. :/ Gotta do that some magic here. Luckily, shouldn't be as | |
# difficult. | |
{_, _, twelfth_cell} = Enum.at(first_section, 11) | |
begin_dates = twelfth_cell | |
|> Utils.filter_binary_and_strip | |
begin_date = String.strip(List.first(twelfth_cell)) | |
# Thirteenth Cell - End Date | |
# *Note: May have more than one end date, ala Timeslots. | |
{_, _, thirteenth_cell} = Enum.at(first_section, 12) | |
end_dates = thirteenth_cell | |
|> Utils.filter_binary_and_strip | |
end_date = String.strip(List.first(thirteenth_cell)) | |
# Fourteenth Cell - Where | |
# * Note - May have more than one location, break on splits fam. | |
{_, _, fourteenth_cell} = Enum.at(first_section, 13) | |
locations = fourteenth_cell | |
|> Utils.filter_binary_and_strip | |
location = String.strip(List.first(fourteenth_cell)) | |
IO.puts course_num | |
IO.puts course_section | |
IO.puts (to_string course_title) | |
IO.puts "#{course_num} - #{course_section} - #{course_title}" | |
IO.puts "CRN #{course_reg_number}" | |
IO.puts "Timeslots: #{timeslots}" | |
IO.puts "Times: #{Enum.join(times, ", ")}" | |
IO.puts "Instructor(s): #{Enum.join(instructors, ", ")}" | |
IO.puts "#{credits} credits, #{open_seats}/#{max_seats} seats left" | |
IO.puts "Starts #{begin_date} and Ends #{end_date}" | |
IO.puts "Location: #{location}" | |
IO.puts "Books Link: #{course_books_link}" | |
course_section_obj = %{ | |
name: course_title, | |
section: course_section, | |
course_number: course_num, | |
timeslots: timeslots, | |
times: "#{Enum.join(times, ", ")}", | |
credits: credits, | |
open_seats: open_seats, | |
max_seats: max_seats, | |
begin_date: begin_date, | |
end_date: end_date, | |
location: location, | |
books_link: course_books_link, | |
instructors: instructors | |
} | |
end | |
@doc """ | |
Fetches the HTML for the designated term and dept | |
""" | |
def fetch_term_dept_html(term, dept) do | |
content_type = %{"Content-type" => | |
"application/x-www-form-urlencoded;charset=utf-8"} | |
html = | |
HTTPoison.post!(@base_url, | |
{:form , [ | |
"TERM": term, | |
"DIVS": "A", | |
"CAMPUS": "M", | |
"SUBJ": dept, | |
"ATTR": "0ANY", | |
"CREDIT": "A", | |
]}, | |
content_type | |
).body | |
course_sections = Floki.find(html, "#resulttable tbody tr") | |
course_sections | |
|> Enum.map(fn {_, _, section} -> | |
process_section_html(section) | |
end | |
) | |
end | |
@doc """ | |
Fetches the courses for every single term and department. | |
""" | |
def fetch_all_courses() do | |
terms = fetch_terms | |
depts = fetch_departments | |
Enum.each(terms, fn term -> | |
IO.puts term.value | |
Enum.each(depts, fn dept -> | |
fetch_term_dept_html(term.value, dept.value) | |
end) | |
end) | |
end | |
def process_course_html(html) do | |
end | |
@doc """ | |
Fetches the HTML for the first term and department | |
""" | |
def fetch_first() do | |
terms = fetch_terms | |
depts = fetch_departments | |
first_term = List.first(terms) | |
first_dept = List.first(depts) | |
first_term_value = "201520" #first_term['value'] | |
first_dept_value = "AL" # first_dept['value'] | |
IO.puts "Fetching Term Department stuff" | |
IO.puts(first_term_value) | |
output = fetch_term_dept_html(first_term_value, first_dept_value) | |
List.first(output) | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment