Created
October 25, 2012 18:59
-
-
Save iheanyi/3954706 to your computer and use it in GitHub Desktop.
Parses Notre Dame class Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package edu.nd.iekechuk.JavaClassParse; | |
import java.io.*; | |
import java.net.MalformedURLException; | |
import java.util.ArrayList; | |
import org.w3c.dom.*; | |
import javax.xml.parsers.*; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
import com.gargoylesoftware.htmlunit.ElementNotFoundException; | |
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; | |
import com.gargoylesoftware.htmlunit.WebClient; | |
import com.gargoylesoftware.htmlunit.html.HtmlForm; | |
import com.gargoylesoftware.htmlunit.html.HtmlOption; | |
import com.gargoylesoftware.htmlunit.html.HtmlPage; | |
import com.gargoylesoftware.htmlunit.html.HtmlSelect; | |
public class ClassParse { | |
public ClassParse() { | |
// TODO Auto-generated constructor stub | |
} | |
public void testClient() { | |
WebClient wc = new WebClient(); | |
wc.setThrowExceptionOnScriptError(false); | |
HtmlPage currentPage = null; | |
try { | |
currentPage = (HtmlPage) wc.getPage("https://was.nd.edu/reg/srch/ClassSearchServlet"); | |
} catch (FailingHttpStatusCodeException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} catch (MalformedURLException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} catch (IOException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
//("Google", currentPage.getTitleText()); | |
final HtmlForm form = currentPage.getFirstByXPath("//form[@action='ClassSearchServlet']"); | |
HtmlSelect selectSubject = (HtmlSelect) currentPage.getElementByName("SUBJ"); | |
// Accounting is selected by default on the page, do not want that | |
selectSubject.setSelectedAttribute((HtmlOption) selectSubject.getOptionByValue("ACCT"), false); // Set default choice of accounting to false | |
HtmlOption optionSubject = (HtmlOption) selectSubject.getOptionByText("Art Studio"); | |
ArrayList<HtmlOption> optionArray = new ArrayList<HtmlOption>(); | |
//optionArray.add(selectSubject.getOptionByValue("CSE")); | |
selectSubject.setSelectedAttribute(optionSubject, true); | |
//form.getInputByName("SUBJ").setValueAttribute("ACCT"); | |
//final HtmlSubmitInput button = form.getInputByName("Submit"); | |
HtmlPage newPage = null; | |
try { | |
newPage = (HtmlPage) form.getInputByValue("Search").click(); | |
} catch (ElementNotFoundException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} catch (IOException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
String html = newPage.asXml(); | |
showResults(html); | |
/* final HtmlTable table = newPage.getHtmlElementById("resulttable"); | |
for (final HtmlTableRow row : table.getRows()) { | |
System.out.println("Found row"); | |
for (final HtmlTableCell cell : row.getCells()) { | |
System.out.println(" Found cell: " + cell.asText()); | |
} | |
}*/ | |
} | |
public void showResults(String html) { | |
String crn; | |
String title; | |
String time; | |
String instructor; | |
String op_spots; | |
String course; | |
String section; | |
Document doc = Jsoup.parse(html); | |
Element table = doc.select("table[id=resulttable]").first(); | |
Element tablebody = table.select("tbody").first(); | |
for(Element row : tablebody.select("tr")) { | |
Elements cells = row.select("td"); | |
crn = cells.get(7).text(); | |
title = cells.get(1).text(); | |
time = cells.get(10).text(); | |
instructor = cells.get(9).text(); | |
op_spots = cells.get(5).text(); | |
course = cells.get(0).text(); | |
String[] values = course.split(" "); | |
course = values[0]; | |
section = values[2]; | |
//System.out.println(course + " " + section); | |
System.out.println(course + " - Section " + section + " : " + crn + " " + title + " " + time + " " + instructor + " " + op_spots); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment